import requests
from bs4 import BeautifulSoup
class Scrawler():
    def __init__(self):
        self.headers = {
            'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95'
        }

    def getCodes(self,url):
        return requests.get(url,headers=self.headers).text
    def getTargetTexts(self,url,selector):#获取纯文本
        bs=BeautifulSoup(self.getCodes(url),'html.parser')
        return [item.text for item in bs.select(selector=selector)]
    def getTargethrefs(self,url,selector):#获取超链接url地址
        bs=BeautifulSoup(self.getCodes(url),"html.parser")
        return [item.attrs['href'] for item in bs.select(selector=selector)]
    def getImgUrl(self,url,selector):#获取图片src地址
        bs=BeautifulSoup(self.getCodes(url),"html.parser")
        return [item.attrs['src'] for item in bs.select(selector=selector)]
    def getImg(self, url, imgName=None):
        rs = requests.get(url,headers=self.headers).content
        with open(imgName,'wb') as f:
            f.write(rs)